Filter Perfomance and Stability

Measure performance between Standard Filter/ Square Root Filter, CPU/GPU, batched/not batched

KalmanFilterPerformance

 KalmanFilterPerformance (n_obs=100, n_dim_obs=4, n_dim_state=3,
                          n_dim_contr=3, bs=5, p_missing=0.3,
                          init_method='random', use_sr_filter=True,
                          device='cpu', use_conditional=True,
                          use_batch=True, **kwargs)

Initialize self. See help(type(self)) for accurate signature.

kf = KalmanFilterPerformance(p_missing=0)

kf.time_method('filter')

0.14900339799987705

source

product_dict

 product_dict (**kwargs)

perf_comb_params

 perf_comb_params (method, n_obs=100, n_dim_obs=4, n_dim_state=3,
                   n_dim_contr=3, bs=5, p_missing=0.3,
                   init_method='random', use_sr_filter=True, device='cpu',
                   use_conditional=True, use_batch=True)

perf_comb_params('filter')

shape: (1, 10)

bs	device	n_dim_contr	n_dim_obs	n_dim_state	n_obs	time	use_batch	use_conditional	use_sr_filter
i64	str	i64	i64	i64	i64	f64	bool	bool	bool
5	"cpu"	3	4	3	100	0.254086	true	true	true

SR vs Normal Filter

perf1 = perf_comb_params('filter', use_sr_filter=[True, False], rep=range(2))

perf1

shape: (200, 11)

bs	device	n_dim_contr	n_dim_obs	n_dim_state	n_obs	rep	time	use_batch	use_conditional	use_sr_filter
i64	str	i64	i64	i64	i64	i64	f64	bool	bool	bool
5	"cpu"	3	4	3	100	0	0.274235	true	true	true
5	"cpu"	3	4	3	100	1	0.265272	true	true	true
5	"cpu"	3	4	3	100	2	0.261474	true	true	true
5	"cpu"	3	4	3	100	3	0.257838	true	true	true
5	"cpu"	3	4	3	100	4	0.262304	true	true	true
5	"cpu"	3	4	3	100	5	0.269207	true	true	true
5	"cpu"	3	4	3	100	6	0.252871	true	true	true
5	"cpu"	3	4	3	100	7	0.262902	true	true	true
5	"cpu"	3	4	3	100	8	0.320611	true	true	true
5	"cpu"	3	4	3	100	9	0.359268	true	true	true
5	"cpu"	3	4	3	100	10	0.351303	true	true	true
5	"cpu"	3	4	3	100	11	0.349618	true	true	true
...	...	...	...	...	...	...	...	...	...	...
5	"cpu"	3	4	3	100	88	0.251293	true	true	false
5	"cpu"	3	4	3	100	89	0.258562	true	true	false
5	"cpu"	3	4	3	100	90	0.249729	true	true	false
5	"cpu"	3	4	3	100	91	0.25231	true	true	false
5	"cpu"	3	4	3	100	92	0.254119	true	true	false
5	"cpu"	3	4	3	100	93	0.2571	true	true	false
5	"cpu"	3	4	3	100	94	0.252648	true	true	false
5	"cpu"	3	4	3	100	95	0.251427	true	true	false
5	"cpu"	3	4	3	100	96	0.249157	true	true	false
5	"cpu"	3	4	3	100	97	0.252483	true	true	false
5	"cpu"	3	4	3	100	98	0.250241	true	true	false
5	"cpu"	3	4	3	100	99	0.252376	true	true	false

perf1.groupby('use_sr_filter').agg(pl.col("time").mean()).with_column(pl.when(pl.col("use_sr_filter")).then(pl.lit("Square Root Filter")).otherwise(pl.lit("Standard Filter")).alias("Filter type"))

shape: (2, 3)

use_sr_filter	time	Filter type
bool	f64	str
true	0.268333	"Square Root Fi...
false	0.250166	"Standard Filte...

perf1 = perf1.with_column(pl.when(pl.col("use_sr_filter")).then(pl.lit("Square Root Filter")).otherwise(pl.lit("Standard Filter")).alias("Filter type"))

plot_perf_sr = alt.Chart(perf1.to_pandas()).mark_boxplot(size = 50).encode(
    x=alt.X('Filter type', axis=alt.Axis(labelAngle=0)),
    y=alt.Y('time', scale=alt.Scale(zero=False), title="time [s]"),
    color=alt.Color('Filter type',
                    scale = alt.Scale(scheme = 'accent'))
).properties(width=300)

plot_perf_sr

CPU vs GPU

gpu_best = perf_comb_params('filter', bs=1, n_obs=5, n_dim_contr=5, n_dim_obs=5, n_dim_state=5,
                            device=['cpu', 'cuda'], use_sr_filter=[True, False], p_missing=0, rep=2, use_batch=[True, False])

gpu_best

shape: (160, 12)

bs	device	n_dim_contr	n_dim_obs	n_dim_state	n_obs	p_missing	rep	time	use_batch	use_conditional	use_sr_filter
i64	str	i64	i64	i64	i64	i64	i64	f64	bool	bool	bool
100	"cpu"	5	5	5	50	0	0	0.134151	true	true	true
100	"cpu"	5	5	5	50	0	1	0.097733	true	true	true
100	"cpu"	5	5	5	50	0	2	0.074773	true	true	true
100	"cpu"	5	5	5	50	0	3	0.07477	true	true	true
100	"cpu"	5	5	5	50	0	4	0.092972	true	true	true
100	"cpu"	5	5	5	50	0	5	0.112495	true	true	true
100	"cpu"	5	5	5	50	0	6	0.106605	true	true	true
100	"cpu"	5	5	5	50	0	7	0.103665	true	true	true
100	"cpu"	5	5	5	50	0	8	0.094165	true	true	true
100	"cpu"	5	5	5	50	0	9	0.105327	true	true	true
100	"cpu"	5	5	5	50	0	10	0.096838	true	true	true
100	"cpu"	5	5	5	50	0	11	0.080813	true	true	true
...	...	...	...	...	...	...	...	...	...	...	...
100	"cuda"	5	5	5	50	0	8	8.77501	false	true	false
100	"cuda"	5	5	5	50	0	9	10.170727	false	true	false
100	"cuda"	5	5	5	50	0	10	11.289223	false	true	false
100	"cuda"	5	5	5	50	0	11	11.012791	false	true	false
100	"cuda"	5	5	5	50	0	12	9.845103	false	true	false
100	"cuda"	5	5	5	50	0	13	9.998321	false	true	false
100	"cuda"	5	5	5	50	0	14	10.513864	false	true	false
100	"cuda"	5	5	5	50	0	15	9.192036	false	true	false
100	"cuda"	5	5	5	50	0	16	10.006169	false	true	false
100	"cuda"	5	5	5	50	0	17	8.915112	false	true	false
100	"cuda"	5	5	5	50	0	18	8.832785	false	true	false
100	"cuda"	5	5	5	50	0	19	9.227368	false	true	false

gpu_best.groupby(['device', 'use_batch']).agg(pl.col("time").mean())

shape: (4, 3)

device	use_batch	time
str	bool	f64
"cuda"	false	9.602944
"cpu"	false	4.560856
"cuda"	true	0.274758
"cpu"	true	0.083738

kwargs = {'a': 1, 'b': (1,2)}
kwargs ={key:tuplify(arg) for key, arg in kwargs.items()}

list(product_dict(**kwargs))

[{'a': 1, 'b': 1}, {'a': 1, 'b': 2}]

method = kf.get_method('filter')

from timeit import timeit

timeit('method()', globals={'method': method}, number=10)

0.15532574900134932

Performance

def compare_performance(n_obs, n_dim_obs, n_dim_state, n_dim_contr, bs, dtype=torch.float64):
    kf_cuda = KalmanFilter.init_random(n_dim_obs,n_dim_state, dtype=dtype).cuda()
    data_cuda, mask_cuda = get_test_data(n_dim_obs,n_dim_state, bs=bs, device="cuda", dtype=dtype)
    
    print("GPU")


    kf_cuda = KalmanFilter.init_random(n_dim_obs,n_dim_state, dtype=dtype)
    data_cuda, mask_cuda = get_test_data(n_dim_obs,n_dim_state, bs=bs, dtype=dtype)
    print("CPU")

    print("No batches CPU")

    print("No batches GPU")

compare_performance(100, 2,2,100)

GPU
87.9 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
CPU
7.83 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches CPU
12.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches GPU
154 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)

compare_performance(200, 10,10,200)

GPU
2.04 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
CPU
7.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches CPU
13.5 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches GPU
2.07 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)

Float64

compare_performance(100, 2,2,100, dtype=torch.float64)

GPU
100 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
CPU
8.29 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches CPU
13.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches GPU
159 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)

compare_performance(200, 10,10,200, dtype=torch.float64)

GPU
2.22 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
CPU
8.35 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches CPU
13.7 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches GPU
2.01 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)

Stability

    kSR.Q_raw = torch.nn.Parameter(kSR.Q_raw + eye_like(kSR.Q_raw) * torch.sqrt(torch.tensor(1e-5)))
    kSR.R_raw = torch.nn.Parameter(kSR.R_raw + eye_like(kSR.R_raw) * torch.sqrt(torch.tensor(1e-5)))
    kSR.P0_raw = torch.nn.Parameter(kSR.P0_raw + eye_like(kSR.P0_raw) * torch.sqrt(torch.tensor(1e-5)))

fuzz_filter_SR

 fuzz_filter_SR (n_iter=10, n_obs=50)

err_raw = fuzz_filter_SR(10, 120)

err = err_raw.groupby('t').agg([
    pl.col('MAE').median().alias("median"),
    pl.col('MAE').quantile(.75).alias("Q3"),
    pl.col('MAE').quantile(.25).alias("Q1"),
    pl.col('MAE').max().alias("max")
])

median = alt.Chart(err.to_pandas()).mark_line(color="black"
           ).encode(
    x = alt.X('t', title="Number of Iterations"),
    y = alt.Y('median', axis=alt.Axis(format=".1e"), scale=alt.Scale(type="log"), title="log MAE"),
    # color=datum("median"),
    strokeDash = datum("median")
    #, scale=alt.Scale(range=['black']))
)

Q1 = alt.Chart(err.to_pandas()).mark_line(color='dimgray', strokeDash=[4,6]).encode(x = 't', y = 'Q1', strokeDash=datum("quantile"))
Q3 = alt.Chart(err.to_pandas()).mark_line(color='dimgray', strokeDash=[4,6]).encode(x = 't', y = 'Q3', strokeDash=datum("quantile"))
max = alt.Chart(err.to_pandas()).mark_line(color='black', strokeDash=[2,2]).encode(x = 't', y = 'max', strokeDash=datum("max"))
p = (Q1 + Q3 + max + median).interactive().properties(title="Standard Filter vs Square Root Filter (Mean Absolute Error of state cavariances)")
p

plot_err_sr_filter

 plot_err_sr_filter (err_raw)